import requests
from bs4 import BeautifulSoup

from fileutil import *
from printutil import *


# 获取服务器的页面数据
def get_index(url):
    try:
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cookie': 'UM_distinctid=17ee82534df9eb-0cc1fd23688306-978183a-1fa400-17ee82534e0b56; PHPSESSID=2fko4c67849u6iem3kko4jcgb1; 989ab0c4bb7207be7ca7f4b4442e8fd2=1; obj=1; a8a991136c6cefaae40db10eb3a2b33e=1; 304c576e471cbdf63cf89651c1ac178d=1; d6ba8ae1fb713227bd6b86486bede7cc=1; CNZZDATA1277809228=65503984-1644564889-https%253A%252F%252Fwww.baidu.com%252F%7C1645169692; 796ab53acf966fbacf8f078ecd10a9ce=a%3A1%3A%7Bi%3A116472%3Bs%3A44%3A%2289578187%7C%2A%7C%E7%AC%AC219%E7%AB%A0%E7%9A%84%E5%A4%A9%E8%B5%8B%E4%B8%8E%E6%96%B0%E5%B9%B4%E6%B4%BB%E5%8A%A8%22%3B%7D',
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Mobile Safari/537.36',
            # 'accept-encoding': 'gzip, deflate, br',
        }
        respose = requests.get(url, headers=headers, timeout=10)
        respose.encoding = 'gbk'
        if respose.status_code == 200:
            return respose.text
    except Exception as e:
        print('获取网页数据异常：'+str(e))
        return None


# 处理页面数据
def task(url):
    print(url)
    data = get_index(url)
    # print(data)
    soup = BeautifulSoup(data, "html.parser")

    name = soup.find("h1").text
    content = soup.find("div", attrs={'class': content_class}).get_text()
    write_txt(content, name.strip(), 1)

    # 获取下一页的链接，或者下一章节的链接地址
    productLinks = soup.find('div', attrs={'class': url_class}).findAll('a', href=True)
    str_page1 = productLinks[url_index]['href']
    # 保存最后下载的章节目录的url，下次直接下载最新的章节
    isTask = write_txt(str_page1, name.strip(), 2)
    print(isTask)

    # time.sleep(20)
    # 获取当前页面下一页的内容， 如果是最后一页就获取下一章的内容
    if isTask:
        task(str_host + str_page1)


# 写入文件
def write_txt(content, name, type):
    if type == 1:
        print("当前章节-----> "+name)
        # 打开「detail_content」文件
        file1 = open(root_path+name+".txt", 'a+', encoding='utf8')
        # 写入文件内容
        file1.write(content)
        # 关闭文件
        file1.close()
        return True
    else:
        print(content)
        if end_url == content:
            return False
        # 打开「detail_content」文件
        file2 = open(root_path + zhang_jie_name, 'w', encoding='utf8')
        # 写入文件内容
        file2.write(content)
        # 关闭文件
        file2.close()
        return True


# 阅读文件
def read_txt():
    # 获取文件名
    file_names = os.listdir(root_path)
    print(file_names)
    for item in file_names:
        if zhang_jie_name == item:
            continue
        txt_file = open(root_path + item, 'r', encoding='utf8')
        str_txt = txt_file.read()
        line_len = 20
        max_line = int(len(str_txt) / line_len)
        for i in range(0, max_line):
            if i == max_line-1:
                print(str_txt[int(i * line_len): int(len(str_txt))])
            else:
                print(str_txt[int(i*line_len): int((i+1)*line_len)])
        print_leng(len(str_txt))
        print_()
        break


# 删除文件 阅读一章之后删除
def remove_index():
    # 获取文件名
    file_names = os.listdir(root_path)
    print(os.listdir(root_path))
    del_file(root_path + file_names[1])
    print(os.listdir(root_path))


# https://www.yqxs.cc/html/25653/25653296/index.html
# 获取 全民领主 小说章节到本地文件
str_host = 'https://www.yqxs.cc'
str_page = '/html/25653/25653296/703675715.html'
end_url = '/html/25653/25653296/index.html'

# 下载目录
root_path = 'd:/xiaoshuo/xiaoshuo_lz/'
zhang_jie_name = 'zhangjie.txt'     # 最新下载章节路径保存文件
content_class = 'showtxt'               # 文章内容div class 名字
url_class = 'page_chapter'       # 下一章div class 名字
url_index = 2                       # 下一章div 里面a标签列表第几个是下一章的a标签


if __name__ == '__main__':
    if not os.path.exists(root_path):
        # 初始化目录，初始化最新章节地址文件
        os.makedirs(root_path)
        file_txt = open(root_path + zhang_jie_name, 'a+', encoding='utf8')
        file_txt.write(str_page)
        file_txt.close()
    file = open(root_path + zhang_jie_name, 'r', encoding='utf8')
    str_page = file.readline()
    file.close()

    task(str_host+str_page)   # 下载章节列表
    # read_txt()       # 阅读章节内容
    # remove_index()     # 阅读完章节内容，删除这个章节

